rm(list = ls())

#setwd("YOUR-WORKING-DIRECTORY")

set.seed(8888)

# Load necessary packages
library("list")
library("endorse")
library("rr")
library("lme4")
library("arm")
library("MASS")
library("coda")
library("xtable")
library("stargazer")

# population data for poststratification
load("population_data.RData")

########################################
## AUXILIARY INFORMATION AS COVARIATE ##
########################################

## LOAD AUXILIARY INFORMATION
load("ms-g11-county-vote-data.RData")
vote <- vote[vote$insample == 1, ]
aux.counties <- vote$yes26

## LOAD DATA
load("list-data.RData")
load("rr-data.RData")
load("end-data.RData")

## LISTWISE DELETION
#  LIST
list <- list[is.na(list$list.y)==F,]
list <- list[is.na(list$dem)==F,]
list <- list[is.na(list$gop)==F,]
list <- list[is.na(list$maleNA)==F,]
list <- list[is.na(list$ageNA)==F,]

#  RANDOMIZED RESPONSE
rr <- rr[is.na(rr$rr.dum)==F,]
rr <- rr[is.na(rr$dem)==F,]
rr <- rr[is.na(rr$gop)==F,]
rr <- rr[is.na(rr$maleNA)==F,]
rr <- rr[is.na(rr$ageNA)==F,]
rr <- rr[is.na(rr$age55plus)==F,]

## DESIGN PARAMETERS
#  LIST
J <- 4
#  RANDOMIZED RESPONSE
p <- .5
p1 <- .5
p0 <- 0

## NUMERIC GROUP INDICATORS
list$insample <- rep(1,nrow(list))
list$numeric.county <- as.numeric(list$county)
rr$insample <- rep(1,nrow(rr))
rr$numeric.county <- as.numeric(rr$county)

#### LIST EXPERIMENT
###  NO CONSTRAINTS
list.no.aux <- ictreg(list.y ~ dem + gop + maleNA + ageNA, 
                    data = list, treat = "treat", J = J,
                    method = "nls")

### AUX AS COVARIATE
list$true.outcome <- NA 
for (i in 1:nrow(list)) {
  list$true.outcome[i] <- aux.counties[list$numeric.county[i]]
}

list.aux.cov <- ictreg(list.y ~  dem + gop + maleNA + ageNA + true.outcome, 
                  data = list, treat = "treat", J = J,  
                  method = "nls", maxIter = 10000)

###  COUNTY CONSTRAINTS
h <- aux.counties
names(h) <- 1:19
group <- list$numeric.county

list.aux.county <- ictreg(list.y ~  dem + gop + maleNA + ageNA, 
                         data = list, treat = "treat", J = J,  
                         method = "nls", h = h, group = group, 
                         maxIter = 10000)


#### RANDOMIZED RESPONSE EXPERIMENT
###  NO CONSTRAINTS
while(!exists("rr.no.aux")) {
  try(
    rr.no.aux <- rrreg(rr.dum ~ dem + gop + maleNA + ageNA + age55plus, 
                       data = rr, p = p, p1 = p1, p0 = p0, design = "forced-known")
  )
}

### AUX AS COVARIATE
rr$true.outcome <- NA 
for (i in 1:nrow(rr)) {
  rr$true.outcome[i] <- aux.counties[rr$numeric.county[i]]
}

while(!exists("rr.aux.cov")) {
  try(
    rr.aux.cov <- rrreg(rr.dum ~ dem + gop + maleNA + ageNA + age55plus + true.outcome, 
                   data = rr, p = p, p1 = p1, p0 = p0, design = "forced-known")
  )
}

###  COUNTY CONSTRAINTS
h <- aux.counties
names(h) <- 1:19
group <- rr$numeric.county

rr.aux.county <- rrreg(rr.dum ~ dem + gop + maleNA + ageNA + age55plus, 
                    data = rr, p = p, p1 = p1, p0 = p0, design = "forced-known", 
                    h = h, group = as.character(group), start = coef(rr.no.aux))

##### SPECIFICATION TEST
list.table <- cbind(
  c(coef(list.no.aux)[1:5], NA), c(sqrt(diag(vcov(list.no.aux)))[1:5], NA), 
    coef(list.aux.cov)[1:6], sqrt(diag(vcov(list.aux.cov)))[1:6], 
      c(coef(list.aux.county)[1:5], NA), c(sqrt(diag(vcov(list.aux.county)))[1:5], NA)
  )
list.table <- rbind(list.table, rep(NA, ncol(list.table)))
list.table <- rbind(list.table, 
  c(rep(NA, ncol(list.table) - 2), 
    list.aux.county$J.stat, list.aux.county$overid.p))
colnames(list.table) <- rep(c("Est.", "S.E."), ncol(list.table)/2)
rownames(list.table) <- c("(Intercept)", "Democrat", "Republican", 
  "Male", "Age", "Aged 55 and over", "County-level Outcome", "Hansen test")

rr.table <- cbind(
  c(coef(rr.no.aux), NA), c(sqrt(diag(vcov(rr.no.aux))), NA), 
    coef(rr.aux.cov), sqrt(diag(vcov(rr.aux.cov))), 
      c(coef(rr.aux.county), NA), c(sqrt(diag(vcov(rr.aux.county))), NA)
  )
rr.table <- rbind(rr.table, 
  c(rep(NA, ncol(list.table) - 2),  
    rr.aux.county$J.stat, rr.aux.county$overid.p)
  )
colnames(rr.table) <- rep(c("Est.", "S.E."), ncol(rr.table)/2)
rownames(rr.table) <- rownames(list.table)

## #######
## TABLE 4
## #######
xtable(cbind(list.table, rr.table))



#### PRECINCT PREDICTIONS BY POSTSTRATIFICATION
# Load precinct-level vote data
load("ms-g11-precinct-vote-data.RData")

# Create county_precinct variable with no missing values
vote <- vote[is.na(vote$precinct)==F,]
vote$county_precinct <- with(vote, factor(paste(COUNTY, precinct, sep="/")))
vote$county_precinct_numeric <- as.numeric(vote$county_precinct)

# Create county-precinct variable in population data
pop.full$county_precinct <- with(pop.full, factor(paste(county, precinct, sep="/")))

# Merging numeric county-precinct variable from voting data and restricting population data to sampled precincts
pop <- merge(subset(vote, select=c(county_precinct, county_precinct_numeric)), pop.full, by="county_precinct")

#### LIST PREDICTIONS VIA POSTSTRATIFICATION
## Get SEs by quai-Bayesian simulation
# population data
x.pop <- subset(pop, select=c(county_precinct_numeric, dem, gop, maleNA, ageNA))
x.pop <- data.frame(rep(1,nrow(x.pop)), x.pop)
x.pop.cov <- x.pop
x.pop.cov$true.outcome <- aux.counties[as.numeric(pop$county)]

n <- 1:length(vote$county_precinct_numeric)
sims <- 1000

# Beta matrix is nsims x 5
noaux.par.treat <- mvrnorm(n=sims, 
                           mu=coef(list.no.aux)[1:(length(coef(list.no.aux))/2)], 
                           Sigma=vcov(list.no.aux)[1:(length(coef(list.no.aux))/2),1:(length(coef(list.no.aux))/2)])
aux.cov.treat <- mvrnorm(n=sims, 
                           mu=coef(list.aux.cov)[1:(length(coef(list.aux.cov))/2)], 
                           Sigma=vcov(list.aux.cov)[1:(length(coef(list.aux.cov))/2),1:(length(coef(list.aux.cov))/2)])
aux.par.treat.county <- mvrnorm(n=sims, 
                                mu=coef(list.aux.county)[1:(length(coef(list.aux.county))/2)], 
                                Sigma=vcov(list.aux.county)[1:(length(coef(list.aux.county))/2),1:(length(coef(list.aux.county))/2)])  

prec.pred.noaux <- array(NA, c(3,length(n)))
prec.pred.aux.cov <- array(NA, c(3,length(n)))
prec.pred.aux.county <- array(NA, c(3,length(n)))

counter <- 0

for (i in n) {

  # Create individual-level matrix of covariates for each county
  x <- x.pop[x.pop$county_precinct_numeric==i,]
  x <- x[,-2]

  x.cov <- x.pop.cov[x.pop.cov$county_precinct_numeric==i,]
  x.cov <- x.cov[,-2]

  prec.pred.noaux[,i] <- quantile(colMeans(invlogit(as.matrix(x) %*% t(noaux.par.treat)), na.rm=T), probs=c(.5, .025, .975))
  prec.pred.aux.cov[,i] <- quantile(colMeans(invlogit(as.matrix(x.cov) %*% t(aux.cov.treat)), na.rm=T), probs=c(.5, .025, .975))
  prec.pred.aux.county[,i] <- quantile(colMeans(invlogit(as.matrix(x) %*% t(aux.par.treat.county)), na.rm=T), probs=c(.5, .025, .975))
  
  counter <- counter + 1
  if (counter == 1) cat("Iteration: \n", counter) else cat(", ", counter)

}

#####
## PLOT RESULTS
#####

results.list <- list(prec.pred.noaux,
                prec.pred.aux.cov,
                prec.pred.aux.county)
approach <- c("prec.noaux",
              "prec.aux.cov",
              "prec.aux.county")

aux.precincts <- vote[order(vote$county_precinct_numeric), ]$yes26

#Calculate rmse
rmse <- NULL
for (i in 1:3) {
  print(i)
  rmse[i] <- sqrt(mean((results.list[[i]][1,] - aux.precincts)^2))
}

#Calculate bias
bias <- NULL
for (i in 1:3) {
  bias[i] <- (1-mean(results.list[[i]][1,])) - (1-mean(aux.precincts)) 
}

#Calculate correlation
cor <- NULL
for (i in 1:3) {
  cor[i] <- cor(aux.precincts, results.list[[i]][1,])
  print(cor.test(aux.precincts, results.list[[i]][1,]))
}

tab.list <- data.frame(approach, rmse, bias, cor)
print(tab.list)


#### RR PREDICTIONS FROM POSTSTRATIFICATION

####
## Predict precinct-level vote 
## Post-stratified estimates
####

## Get SEs using quasi-Bayesian simulation
# Beta matrix is nsims x 5
noaux.par.treat <- mvrnorm(n=sims, 
                           mu=coef(rr.no.aux), 
                           Sigma=vcov(rr.no.aux))
aux.cov.treat <- mvrnorm(n=sims, 
                           mu=coef(rr.aux.cov), 
                           Sigma=vcov(rr.aux.cov))
aux.par.treat.county <- mvrnorm(n=sims, 
                                mu=coef(rr.aux.county), 
                                Sigma=vcov(rr.aux.county))  

# simulation parameters
n <- 1:length(vote$county_precinct_numeric)
sims <- 1000

# population data
x.pop <- subset(pop, select=c(county_precinct_numeric, dem, gop, maleNA, ageNA, age55plus))
x.pop <- data.frame(rep(1,nrow(x.pop)), x.pop)
x.pop.cov <- x.pop
x.pop.cov$true.outcome <- aux.counties[as.numeric(pop$county)]

prec.pred.noaux <- array(NA, c(3,length(n)))
prec.pred.aux.cov <- array(NA, c(3,length(n)))
prec.pred.aux.county <- array(NA, c(3,length(n)))

counter <- 0

for (i in n) {
  
  # Create individual-level matrix of covariates for each county
  x <- x.pop[x.pop$county_precinct_numeric==i,]
  x <- x[,-2]

  x.cov <- x.pop.cov[x.pop.cov$county_precinct_numeric==i,]
  x.cov <- x.cov[,-2]

  prec.pred.noaux[,i] <- quantile(colMeans(invlogit(as.matrix(x) %*% t(noaux.par.treat)), na.rm=T), probs=c(.5, .025, .975))
  prec.pred.aux.cov[,i] <- quantile(colMeans(invlogit(as.matrix(x.cov) %*% t(aux.cov.treat)), na.rm=T), probs=c(.5, .025, .975))
  prec.pred.aux.county[,i] <- quantile(colMeans(invlogit(as.matrix(x) %*% t(aux.par.treat.county)), na.rm=T), probs=c(.5, .025, .975))
  
  counter <- counter + 1
  if (counter == 1) cat("Iteration: \n", counter) else cat(", ", counter)
}

#####
## PLOT RESULTS
#####

results.rr <- list(prec.pred.noaux,
                prec.pred.aux.cov,
                prec.pred.aux.county)
approach <- c("prec.noaux",
              "prec.aux.cov",
              "prec.aux.county")

aux.precincts <- vote[order(vote$county_precinct_numeric), ]$yes26

#Calculate rmse
rmse <- NULL
for (i in 1:3) {
  print(i)
  rmse[i] <- sqrt(mean((results.rr[[i]][1,] - aux.precincts)^2))
}

#Calculate bias
bias <- NULL
for (i in 1:3) {
  bias[i] <- (1-mean(results.rr[[i]][1,])) - (1-mean(aux.precincts)) 
}

#Calculate correlation
cor <- NULL
for (i in 1:3) {
  cor[i] <- cor(aux.precincts, results.rr[[i]][1,])
  print(cor.test(aux.precincts, results.rr[[i]][1,]))
}

tab.rr <- data.frame(approach, rmse, bias, cor)
print(tab.rr)

pdf("figure-7.pdf", height = 8, width = 13)
{
      par(mfrow = c(2, 3),
      #  par(mfrow = c(1, 2),
          las = 1, 
          mar = c(3, 2.5, 0.1, 0.1), 
          oma = c(1.5, 2.5, 2, 1.5),
          # oma = c(1.5, 3.5, 2, 1.5),
          mgp = c(1.8, 0.5, 0),
          xpd = FALSE,
          tck = -0.02, 
          cex = 1, 
          pty = "sq"
      )

      truth <- list(1-vote$yes26, 1-vote$yes26, 1-vote$yes26)
      pch <- 19
      
      for (i in 1:3){
        est <- 1-results.list[[i]][1,]
        low.est <- 1-results.list[[i]][2,]
        up.est <- 1-results.list[[i]][3,]
        plot(truth[[i]], est, type = "p", ylab = "", xlab = "", 
             col=rgb(0,0,0,alpha=1),
             pch = pch, 
             main="",
             xlim=c(0,1),
             ylim=c(0,1))
        mtext(text = "Actual", side = 1, line = 1.5, las = 0)
        # if (i == 4) 
        mtext(text = "Estimate", side = 2, line = 2.25, las = 0)
        abline(0,1, col="red")
        segments(truth[[i]], low.est, truth[[i]], up.est, lwd =  0.5, col=rgb(0,0,0,alpha=1))
        text(x = -0.015, y = 0.95, paste("bias = ", round(tab.list[i,3], 3), "\nRMSE = ", round(tab.list[i,2] ,3), "\ncor = ", round(tab.list[i,4] ,3), sep=""), 
           adj=0, cex = 0.8)
      }
      
      mtext("Without Auxiliary Information", side = 3, line = 0.5, outer = TRUE, at=.19, font=2, las = 0) 
      mtext("Auxiliary Information as Covariate", side = 3, line = 0.5, outer = TRUE, at=.52, font=2, las = 0) 
      mtext("Auxiliary Information via GMM", side = 3, line = 0.5, outer = TRUE, at=.85, font=2, las = 0) 

      mtext("List Experiment", side = 2, line = 0.75, outer = TRUE, at = .78, font=2, las = 0) 
      mtext("Randomized Response", side = 2, line = 0.75, outer = TRUE, at = .29, font=2, las = 0) 


      truth <- list(1-vote$yes26, 1-vote$yes26, 1-vote$yes26)
      pch <- 19
      
      for (i in 1:3) {
        est <- 1-results.rr[[i]][1,]
        low.est <- 1-results.rr[[i]][2,]
        up.est <- 1-results.rr[[i]][3,]
        plot(truth[[i]], est, type = "p", ylab = "", xlab = "", 
             col=rgb(0,0,0,alpha=1),
             pch = pch, 
             main="",
             xlim=c(0,1),
             ylim=c(0,1))
        mtext(text = "Actual", side = 1, line = 1.5, las = 0)
        # if (i == 4) 
        mtext(text = "Estimate", side = 2, line = 2.25, las = 0)
        abline(0,1, col="red")
        segments(truth[[i]], low.est, truth[[i]], up.est, lwd =  0.5, col=rgb(0,0,0,alpha=1))
        text(x = -0.015, y = 0.95, paste("bias = ", round(tab.rr[i,3], 3), "\nRMSE = ", round(tab.rr[i,2] ,3), "\ncor = ", round(tab.rr[i,4] ,3), sep=""), 
           adj=0, cex = 0.8)
      }
  
}
dev.off()

